Summarize escape across all sera alongside functional effects¶
In [1]:
import altair as alt
import pandas as pd
import polyclonal.alphabets
from polyclonal.plot import color_gradient_hex
_ = alt.data_transformers.disable_max_rows()
The next cell is tagged as parameters for papermill parameterization:
In [2]:
site_numbering_map_csv = None
func_effects_csv = None
sera = None
chart = None
csv_file = None
In [3]:
# Parameters
sera = {
"sera1-01": "results/antibody_escape/averages/sera1-01_mut_effect.csv",
"sera1-02": "results/antibody_escape/averages/sera1-02_mut_effect.csv",
"sera1-03": "results/antibody_escape/averages/sera1-03_mut_effect.csv",
"sera1-04": "results/antibody_escape/averages/sera1-04_mut_effect.csv",
"sera1-05": "results/antibody_escape/averages/sera1-05_mut_effect.csv",
"sera2-01": "results/antibody_escape/averages/sera2-01_mut_effect.csv",
"sera2-04": "results/antibody_escape/averages/sera2-04_mut_effect.csv",
"sera2-05": "results/antibody_escape/averages/sera2-05_mut_effect.csv",
}
site_numbering_map_csv = "data/site_numbering_map.csv"
func_effects_csv = "results/func_effects/averages/293T_entry_func_effects.csv"
chart = "results/summaries/escape_summary_nolegend.html"
csv_file = "results/summaries/escape_summary.csv"
Some configuration for plot:
In [4]:
times_seen = 2 # only include mutations with times_seen >= this
frac_models = 1 # only include mutations in >= this fraction of models / selections
escape_stat = "escape_median" # for each sera, use this escape value (mean or median)
init_site_escape_stat = "mean" # default site escape stat to show
init_min_func_effect = -3 # default minimum functional effect to show
init_floor_escape_at_zero = True # default on whether to floor escape at zero
# for heatmap colors
escape_negative_color = "#0072B2" # french blue
escape_positive_color = "#E69F00" # orange
escape_max_at_least = 1
escape_min_at_least = -1
func_positive_color = "#009E73" # green
func_negative_color = "#CC79A7" # wild orchid
func_max_at_least = 1
func_min_at_least = 0
Read the escape data and add site numbering and functional effect data:
In [5]:
escape_tidy = (
pd.concat([pd.read_csv(f).assign(serum=s) for s, f in sera.items()])
.rename(columns={escape_stat: "escape"})
.query("frac_models >= @frac_models")
.query("times_seen >= @times_seen")
[["epitope", "serum", "site", "wildtype", "mutant", "escape"]]
)
assert escape_tidy["epitope"].nunique() == 1, "averaging only works for one epitope"
escape = (
escape_tidy
.pivot_table(
index=["site", "wildtype", "mutant"],
columns="serum",
values="escape",
).reset_index()
.assign(site_mutant=lambda x: x["site"].astype(str) + x["mutant"])
)
assert escape["site_mutant"].nunique() == len(escape)
site_numbering_map = (
pd.read_csv(site_numbering_map_csv)
.rename(columns={"reference_site": "site"})
[["site", "sequential_site", "region"]]
)
func_effects = (
pd.read_csv(func_effects_csv)
.rename(columns={"effect": "functional effect"})
.query("times_seen >= @times_seen")
.assign(frac_selections=lambda x: x["n_selections"] / x["n_selections"].max())
.query("frac_selections >= @frac_models")
[["site", "wildtype", "mutant", "functional effect"]]
)
# add wildtype functional effects of zero
func_effects = (
pd.concat(
[
func_effects,
(
func_effects
[["site", "wildtype"]]
.drop_duplicates()
.assign(
mutant=lambda x: x["wildtype"],
**{"functional effect": 0},
)
)
],
ignore_index=True,
)
.assign(site_mutant=lambda x: x["site"].astype(str) + x["mutant"])
.merge(site_numbering_map, on="site", validate="many_to_one")
)
assert func_effects["site_mutant"].nunique() == len(func_effects)
escape
Out[5]:
| serum | site | wildtype | mutant | sera1-01 | sera1-02 | sera1-03 | sera1-04 | sera1-05 | sera2-01 | sera2-04 | sera2-05 | site_mutant |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | E | A | 0.01300 | -0.082410 | -0.02091 | -0.158800 | -0.212200 | -0.12620 | -0.15580 | 0.06117 | 2A |
| 1 | 2 | E | C | 0.05770 | -0.007879 | 0.05043 | 0.057150 | -0.006042 | 0.07697 | 0.05997 | 0.03985 | 2C |
| 2 | 2 | E | D | 0.22130 | 0.288800 | 0.15600 | -0.087910 | 0.026780 | 0.26090 | 0.01403 | 0.15300 | 2D |
| 3 | 2 | E | F | -0.03430 | 0.127500 | 0.05859 | -0.007849 | 0.046290 | 0.07214 | 0.02264 | -0.01301 | 2F |
| 4 | 2 | E | G | 0.02541 | 0.091050 | -0.04513 | -0.020370 | 0.005345 | -0.07698 | 0.02925 | -0.29270 | 2G |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4476 | 568 | * | P | -0.33150 | -0.166400 | -0.14920 | -0.101800 | -0.324600 | -0.20240 | -0.13620 | -0.52120 | 568P |
| 4477 | 568 | * | Q | NaN | NaN | NaN | NaN | NaN | -0.51620 | NaN | -0.27450 | 568Q |
| 4478 | 568 | * | T | NaN | NaN | NaN | NaN | NaN | -0.08073 | NaN | 0.24120 | 568T |
| 4479 | 568 | * | V | -0.44540 | -0.486700 | -0.28670 | -0.209100 | -0.526300 | -0.41590 | -0.32580 | -0.92130 | 568V |
| 4480 | 568 | * | W | -0.17250 | -0.413100 | -0.18020 | -0.040380 | NaN | -0.64430 | 0.19360 | -0.78800 | 568W |
4481 rows × 12 columns
Now make a site summary escape plot for all the sera:
In [6]:
floor_escape_at_zero = alt.param(
value=init_floor_escape_at_zero,
name="floor_escape_at_zero",
bind=alt.binding_radio(options=[True, False], name="floor escape at zero"),
)
site_stats = ["mean", "sum", "max", "min"]
site_escape_selection = alt.selection_point(
fields=["site escape statistic"],
bind=alt.binding_select(
options=site_stats,
name="site escape statistic",
),
value=init_site_escape_stat,
)
site_selection = alt.selection_point(fields=["site"], on="mouseover", empty=False)
func_effects_slider = alt.param(
value=init_min_func_effect,
name="func_effects_slider",
bind=alt.binding_range(
name="minimum mutation functional effect",
min=func_effects["functional effect"].min(),
max=0,
),
)
site_brush = alt.selection_interval(
encodings=["x"],
mark=alt.BrushConfig(stroke="black", strokeWidth=2, fillOpacity=0),
empty=True,
)
site_escape_width = 600 # width of site escape chart
site_escape_base = (
alt.Chart(escape)
.encode(
y=alt.Y(
"escape:Q",
scale=alt.Scale(nice=False, padding=5),
axis=alt.Axis(grid=False),
),
tooltip=[
"site",
alt.Tooltip("escape:Q", format=".2f"),
"wildtype",
"sequential_site:Q",
"serum:N",
"region:N",
],
)
)
site_escape_lines = site_escape_base.mark_line(size=0.75, opacity=1)
site_escape_points = site_escape_base.encode(
strokeWidth=alt.condition(site_selection, alt.value(3), alt.value(0)),
).mark_circle(filled=True, opacity=1, stroke="red", size=20)
site_escape_lines_and_points = (
(site_escape_lines + site_escape_points)
.transform_fold(fold=list(sera), as_=["serum", "escape_orig"])
# floor escape at zero if selected
.transform_calculate(
escape=alt.expr.if_(
floor_escape_at_zero,
alt.expr.max(alt.datum["escape_orig"], 0),
alt.datum["escape_orig"],
)
)
# filter on functional effects
.transform_lookup(
lookup="site_mutant",
from_=alt.LookupData(
func_effects,
key="site_mutant",
fields=["functional effect"],
),
)
.transform_filter(alt.datum["functional effect"] >= func_effects_slider)
# compute site statistics from mutation statistics
.transform_aggregate(
**{stat: f"{stat}(escape)" for stat in site_stats},
groupby=["site", "serum", "wildtype"],
)
# filter on site statistic of interest
.transform_fold(fold=site_stats, as_=["site escape statistic", "escape"])
.transform_filter(site_escape_selection)
# get sequential sites and regions
.transform_lookup(
lookup="site",
from_=alt.LookupData(
site_numbering_map,
key="site",
fields=["sequential_site", "region"],
),
)
)
site_escape = (
site_escape_lines_and_points
.encode(
x=alt.X(
"site:N",
sort=alt.SortField("sequential_site:Q"),
axis=alt.Axis(labelOverlap=True, grid=False),
),
opacity=alt.condition(site_brush, alt.value(1), alt.value(0.4)),
color=alt.value("gray"),
)
.properties(height=45, width=site_escape_width)
.facet(
facet=alt.Facet(
"serum:N",
title="individual sera",
header=alt.Header(
labelOrient="right",
labelFontSize=10,
labelPadding=3,
titleOrient="right",
titlePadding=3,
),
),
columns=1,
spacing=0,
)
)
site_mean_escape = (
site_escape_lines_and_points
# average missing values as zero
.transform_calculate(
escape=alt.expr.if_(
alt.expr.isValid(alt.datum["escape"]),
alt.datum["escape"],
0,
),
)
# take mean over sera
.transform_aggregate(
escape="mean(escape)",
groupby=["site", "wildtype", "sequential_site", "region"],
)
.transform_calculate(serum="'mean of all sera'")
.encode(
x=alt.X(
"site:N",
sort=alt.SortField("sequential_site:Q"),
axis=None,
),
opacity=alt.condition(site_brush, alt.value(1), alt.value(0.4)),
color=alt.value("black")
)
.properties(
height=70,
width=site_escape_width,
title=alt.TitleParams(
"mean of sera", fontSize=11, fontWeight="bold", orient="right",
),
)
)
region_bar = (
alt.Chart(site_numbering_map)
.encode(
x=alt.X(
"site:N",
sort=alt.SortField("sequential_site:Q"),
axis=None,
),
color=alt.Color(
"region",
scale=alt.Scale(domain=site_numbering_map["region"].unique()),
),
tooltip=["site", "region", "sequential_site"],
)
.mark_rect()
.properties(width=site_escape_width, height=9)
)
site_chart = alt.vconcat(
region_bar,
alt.vconcat(site_mean_escape, site_escape, spacing=3).add_params(
site_escape_selection,
site_selection,
func_effects_slider,
floor_escape_at_zero,
),
spacing=0,
).add_params(site_brush)
site_chart
Out[6]:
Now prepare to plot the heatmaps. First, create a data frame that has the functional effects and the average escape across sera (averaging mutations missing for a serum as zero for that serum):
In [7]:
heatmap_data = (
pd.concat(
[
escape,
# add wildtype with zero escape
(
escape
[["site", "wildtype"]]
.drop_duplicates()
.assign(mutant=lambda x: x["wildtype"])
),
],
ignore_index=True,
)
.fillna(0)
.assign(escape=lambda x: x[list(sera)].mean(axis=1))
.drop(columns=list(sera) + ["site_mutant"])
.merge(func_effects, validate="one_to_one", how="outer")
.drop(columns=["sequential_site", "region"])
.merge(site_numbering_map, validate="many_to_one")
.assign(
escape=lambda x: x["escape"].where(
x["wildtype"] != x["mutant"],
0,
),
)
.drop(columns="site_mutant")
)
Write these data to a CSV:
In [8]:
print(f"Writing summary data to {csv_file}")
(
heatmap_data
.merge(
heatmap_data
.query("wildtype != mutant")
.groupby("site", as_index=False)
.aggregate(mean_site_escape=pd.NamedAgg("escape", "mean")),
how="outer",
validate="many_to_one",
)
.to_csv(csv_file, index=False, float_format="%.4g")
)
Writing summary data to results/summaries/escape_summary.csv
Make heatmaps:
In [9]:
cell_size = 9 # heatmap cell size
alphabet = polyclonal.alphabets.biochem_order_aas(func_effects["mutant"].unique())
heatmap_base = (
alt.Chart(heatmap_data)
# convert null values to NaN so they show as NaN in tooltips rather than as 0.0
.transform_calculate(
escape_floored=alt.expr.if_(
floor_escape_at_zero,
alt.expr.max(alt.datum["escape"], 0),
alt.datum["escape"],
),
**{
col: alt.expr.if_(
alt.expr.isFinite(alt.datum[col]),
alt.datum[col],
alt.expr.NaN,
)
for col in ["escape", "functional effect"]
}
)
.encode(
x=alt.X(
"site:N",
sort=alt.SortField("sequential_site"),
axis=alt.Axis(labelFontSize=9, ticks=False),
),
y=alt.Y(
"mutant:N",
title="amino acid",
sort=alphabet,
axis=alt.Axis(labelFontSize=9, ticks=False),
),
)
.properties(width=alt.Step(cell_size), height=alt.Step(cell_size))
.add_params(func_effects_slider, floor_escape_at_zero)
)
# mark X for wildtype
heatmap_wildtype = (
heatmap_base
.transform_filter(alt.datum["wildtype"] == alt.datum["mutant"])
.mark_text(text="x", color="black")
)
# gray background for missing values
heatmap_bg = (
heatmap_base
.transform_impute(
impute="_stat_dummy",
key="mutant",
keyvals=alphabet,
groupby=["site"],
value=None,
)
.mark_rect(color="#E0E0E0")
)
tooltips = [
"site",
"mutant",
alt.Tooltip("escape", format=".2f"),
alt.Tooltip("functional effect", format=".2f"),
"wildtype",
"sequential_site",
"region",
]
legend=alt.Legend(
orient="left",
titleOrient="left",
gradientLength=100,
gradientThickness=10,
gradientStrokeColor="black",
gradientStrokeWidth=0.5,
)
# heatmap for escape
escape_heatmap = (
heatmap_base
.transform_filter(
(alt.datum["functional effect"] >= func_effects_slider)
| (alt.datum["wildtype"] == alt.datum["mutant"])
)
.encode(
# turn off x-labels for this heatmap since it is stacked
x=alt.X(
"site:N",
sort=alt.SortField("sequential_site"),
title=None,
axis=alt.Axis(ticks=False, labels=False),
),
color=alt.Color(
"escape_floored:Q",
title="escape",
legend=legend,
scale=alt.Scale(
zero=True,
nice=False,
type="linear",
domainMid=0,
domainMax=max(escape_max_at_least, heatmap_data["escape"].max()),
domainMin=alt.ExprRef(
f"if(floor_escape_at_zero, 0, {escape_min_at_least})"
),
range=(
color_gradient_hex(escape_negative_color, "white", n=20)
+ color_gradient_hex("white", escape_positive_color, n=20)[1:]
),
),
),
tooltip=tooltips,
)
.mark_rect(stroke="black")
)
# heatmap for func effect filtered escape
escape_func_filtered_heatmap = (
heatmap_base
.transform_filter(
(alt.datum["functional effect"] < func_effects_slider)
& (alt.datum["wildtype"] != alt.datum["mutant"])
)
.transform_calculate(filtered="''")
.encode(
tooltip=tooltips,
color=alt.Color(
"filtered:N",
title=["functionally", "deleterious"],
scale=alt.Scale(range=["silver"]),
legend=None,
),
)
.mark_rect(stroke="black")
)
# heatmap for functional effects
func_heatmap = (
heatmap_base
.encode(
color=alt.Color(
"functional effect",
legend=legend,
scale=alt.Scale(
zero=True,
nice=False,
type="linear",
clamp=True,
domainMid=0,
domainMax=max(func_max_at_least, heatmap_data["functional effect"].max()),
domainMin=alt.ExprRef(f"min(func_effects_slider, {func_min_at_least})"),
range=(
color_gradient_hex(func_negative_color, "white", n=20)
+ color_gradient_hex("white", func_positive_color, n=20)[1:]
),
),
),
tooltip=tooltips,
)
.mark_rect(stroke="black")
)
heatmap = alt.vconcat(
heatmap_bg + escape_heatmap + escape_func_filtered_heatmap + heatmap_wildtype,
heatmap_bg + func_heatmap + heatmap_wildtype,
spacing=1,
).resolve_scale(color="independent")
heatmap
Out[9]:
Make merged chart with everything:
In [10]:
merged_chart = alt.vconcat(
site_chart,
heatmap.transform_filter(site_brush),
spacing=5,
).configure_legend(orient="left")
print(f"Saving to {chart}")
merged_chart.save(chart)
merged_chart
Saving to results/summaries/escape_summary_nolegend.html
Out[10]:
In [ ]: